cls
set more off

cd "SET BASE DIRECTORY"

global jobs ".\jobs"
global metrics ".\metrics"

* current employment flat file from https://download.bls.gov/pub/time.series/ce
insheet using "$jobs/ce.data.0.AllCESSeries", clear
keep if year>=2001
keep if substr(series_id, 3, 1)=="S"
keep if substr(series_id, 12, 2)=="01"
drop foot
drop if period=="M13"
destring period, ignore("M") replace
destring value, force replace
gen date=ym(year, period)
sum date
gen industry_code=substr(series_id, 4, 8)
destring industry_code, force replace
save "$jobs\working jobs file", replace

* raw jolts flat file from https://download.bls.gov/pub/time.series/jt
import delimited "$jobs\jt.data.1.AllItems", clear
destring value, force replace
destring period, ignore("M") replace
drop if period==13
gen date=ym(year, period)
keep if substr(series_id, 4, 15)=="000000000000000"
tab series_id
keep if substr(series_id, 3, 1)=="S"
keep if substr(series_id, 21, 1)=="L"
gen metric=substr(series_id, 19, 2)
duplicates list metric date
keep metric value date year period
reshape wide value, i(date year period) j(metric) string
save "$jobs\working jolts file", replace

* raw usual weekly earnings flat file from https://download.bls.gov/pub/time.series/le
import delimited "$jobs\le.data.1.AllData", clear 
drop if period=="A01"
destring value, force replace
destring period, ignore("Q") replace
drop if period==5
drop footnote
replace series_id=trim(series_id)
keep if inlist(series_id, "LES1252881600", "LES1252881500", "LEU0252911300")

gen date=yq(year, period)
reshape wide value, i(year period date) j(series_id) string
rename value* *
* next line deflates 25th percentile earnings by the ratio of the current and constant measures for median earnings
gen pct25constant= LEU0252911300/( LES1252881500/ LES1252881600)
tsset date, quarterly
* seasonally adjust 25th percentile
x13as pct25
l

keep year period date LES1252881600 pct25constant_sa
rename ( LES1252881600 pct25constant_sa) (earnings_median earnings_pct25)
save "$metrics\earnings", replace


* code for cleaning HH survey flat file https://download.bls.gov/pub/time.series/ln/
import delimited "$jobs\ln.data.1.AllData", clear
keep if year>=2001
drop foot
drop if period=="M13"
drop if substr(period, 1, 1)=="Q"
drop if substr(period, 1, 1)=="A"
destring period, ignore("M") replace
gen date=ym(year, period)
replace series_id=trim(series_id)
destring value, force replace
sum date
save "$jobs\household survey series", replace


* starts with my cleaned version of the household survey flat file
use "$jobs\household survey series", clear
keep if substr(series_id, 3, 1)=="S" | series_id=="LNU00000060"
reshape wide value, i(year period date) j(series_id) string
rename value* *

gen core=(LNS13000000+ LNS15026642-LNS13023653)/(LNS11000000+ LNS15026642)
gen u3=LNS13000000/LNS11000000
gen unemployed=LNS13000000
gen epop=LNS12000060/LNU00000060
gen u6=( LNS13000000 + LNS15026642 + LNS12032194 ) / (LNS11000000 + LNS15026642 )
gen ltunemp=LNS13008636/LNS11000000

drop LN*
save "$metrics\household", replace


* starts with my cleaned version of the payroll survey flat file; merge in JOLTS, household series, and adjusted epop
use "$jobs\working jobs file", replace
keep if series_id=="CES0000000001"
rename value payroll
keep year period date payroll

merge 1:1 date using "$jobs\working jolts file", nogen
gen churn=((valueHI+valueTS)-abs(valueHI-valueTS))/2/payroll
gen hires= valueHI/ payroll
gen quits=valueQU/payroll

merge 1:1 date using "$metrics\household", nogen
* adjusted epop is calculated from another program. see lines 453-518 of "jobs day series detail" do file
merge 1:1 date using "$jobs\adjusted epop", nogen keepusing(epop_sa)
rename epop_sa epopadj

tsset date, monthly
gen paych=payroll-L1.payroll
gen joperu3=valueJO/unemployed

* take 6 month trailing average, and calculate percentile
foreach x of varlist churn hires quits core u3 u6 ltunemp epop epopadj paych joperu3 {
	tssmooth ma `x'6mo=`x', window(5 1 0)
	xtile `x'pct=`x'6mo, nq(100)
}

* reverse percentile for variables were lower = better
foreach x of varlist core u3 u6 ltunemp {
	replace `x'pct=101-`x'pct
}


reg u3 hires if year<2020
predict u3_hires

outsheet year date hires quits churn joperu3 u3 if year>=2001 using "$metrics\jolts.csv", comma replace

preserve
keep year period date *pct
keep if inlist(date, 791, 779)
xpose, varname clear
order _varname v1 v2
outsheet using "$metrics\summary.csv", comma replace
restore

egen variance=rowsd(*pct)





use "$metrics\earnings", clear
tsset date, quarterly

foreach x of varlist earnings* {
	xtile `x'pct=`x', nq(100)
}











* FOR REFERENCE: code for age-adjusted EPOP, and EPOP by 5-year age group

* age adjusted epop

use "$jobs\household survey series", clear
merge m:1 series_id using "seriesid for epop", keep(matched) nogen

reshape wide value, i(year period date) j(series_id) string
rename value* *

gen epop16=LNU02000086/LNU00000086
gen epop18=LNU02000088/LNU00000088
gen epop20=LNU02000036/LNU00000036
gen epop25=LNU02024932/LNU00024932
gen epop30=LNU02024933/LNU00024933
gen epop35=LNU02024934/LNU00024934
gen epop40=LNU02024935/LNU00024935
gen epop45=LNU02024936/LNU00024936
gen epop50=LNU02024937/LNU00024937
gen epop55=LNU02000094/LNU00000094
gen epop60=LNU02000096/LNU00000096
gen epop65=LNU02024938/LNU00024938
gen epop70=LNU02024941/LNU00024941
gen epop75=LNU02024942/LNU00024942

rename LNU00000086 pop16
rename LNU00000088 pop18
rename LNU00000036 pop20
rename LNU00024932 pop25
rename LNU00024933 pop30
rename LNU00024934 pop35
rename LNU00024935 pop40
rename LNU00024936 pop45
rename LNU00024937 pop50
rename LNU00000094 pop55
rename LNU00000096 pop60
rename LNU00024938 pop65
rename LNU00024941 pop70
rename LNU00024942 pop75

keep year period date pop* epop*

reshape long pop epop, i(year period date) j(age)

xtset age date, monthly
replace epop=(L1.epop+F1.epop)/2 if date==789

* calculate age distribution for 2024 to merge onto the age-specific EPOP series
preserve
collapse (mean) fixedpop=pop if year==2024, by(age)
save "age distribution", replace
restore 

merge m:1 age using "age distribution"
collapse (mean) epop [aw=fixedpop], by(year period date)

tsset date, monthly
x13as epop

save "$jobs\adjusted epop", replace
outsheet year period epop epop_sa using "$jobs\adjusted epop.csv", comma replace

* by 5 year age group
* combine 16-17 and 18-19

use "$jobs\household survey series", clear
merge m:1 series_id using "seriesid for epop", keep(matched) nogen

reshape wide value, i(year period date) j(series_id) string
rename value* *

gen epop16=(LNU02000086+LNU02000088)/(LNU00000086+LNU00000088)
gen epop20=LNU02000036/LNU00000036
gen epop25=LNU02024932/LNU00024932
gen epop30=LNU02024933/LNU00024933
gen epop35=LNU02024934/LNU00024934
gen epop40=LNU02024935/LNU00024935
gen epop45=LNU02024936/LNU00024936
gen epop50=LNU02024937/LNU00024937
gen epop55=LNU02000094/LNU00000094
gen epop60=LNU02000096/LNU00000096
gen epop65=LNU02024938/LNU00024938
gen epop70=LNU02024941/LNU00024941
gen epop75=LNU02024942/LNU00024942


keep year period date epop*

* adjust this for latest 12 month period
recode date (768/779=2024) (780/791=2025) (else=.), gen(fakeyear)
* omit October of all years since Oct 2025 missing due to shutdown
drop if period==10

collapse (mean) epop*, by(fakeyear)
drop if fakeyear==.
reshape long epop, i(fakeyear) j(age)
reshape wide epop, i(age) j(fakeyear)

gen diff=epop2025-epop2024
outsheet using "$jobs\epop by agegroup.csv", comma replace

